# "Media's Influence on LGBTQ Support Across Africa" by Stephen Winkler

# Code to build the dataset used for analysis, creates file:
#   - `data/afrobarometer.rds`

rm(list=ls()) # clear environment
source("Rprofile.R") # setup Rprofile (load packages, etc.)

set.seed(6906859)

## Afrobarometer Data

# Directory containing Afrobarometer data
AFROBAROMETER_FILE <- project_path("data-raw", "afrobarometer",
                                   "merged_r6_data_2016_36countries2.sav")

# Load Afrobarometer Data
# 
# All Afrobarometer variables are by defalt upper-cased, so make lower-case
# Since this is SPSS, convert the labelled data to actual factors.
# See the haven documentation for the difference

afrobarometer <-
  read_spss(AFROBAROMETER_FILE) %>%
  {set_names(., str_to_lower(names(.)))} %>%
  mutate_if(.predicate = is.labelled, .funs = funs(as_factor))

# Function to replace common missing value patterns with missing.
#
# @param x vector
# @param pattern regular expression pattern to replace with missing values.
# @return A character vector
responses_to_NA <- function(x, lvls = c("Don't know", "missing", 
                                        "refused to answer")) {
  fct_rm_levels(x, lvls)
}

# NOTE: a change in packages after publication appears to affect this function.
# it may no longer remove the "NA" level from factors. 

# Extract relevant variables & clean them

# Sexuality (ordinal)
afrobarometer %<>%
  mutate(sexuality = responses_to_NA(q89c, c("Don't know", "Refused", "Missing", "Not asked in this country")))

# Sexuality (logical)
# if we use %in% it includes NAs as 'false', but they should be dropped instead
afrobarometer %<>%
  mutate(sexuality2 = responses_to_NA(q89c, c("Don't know", "Refused", "Missing", "Not asked in this country")),
         sexuality2 = fct_recode(sexuality2,
                                 "TRUE" = "Would not care",
                                 "TRUE" = "Somewhat  like",
                                 "TRUE" = "Strongly like",
                                 "FALSE" = "Somewhat dislike",
                                 "FALSE" = "Strongly  dislike"
         ))
afrobarometer$sexuality2 <- as.logical(afrobarometer$sexuality2)

# Gender 
afrobarometer %<>%
  mutate(female = q101 %in% c("Female"))

# Education q97
afrobarometer %<>%
  mutate(education = responses_to_NA(q97, c("Refused", "Missing", "Don't know")))

# Religion q98a
afrobarometer %<>%
  mutate(religion = responses_to_NA(q98a, c("Refused", "Missing", "Don't know") ))

# Religion Bin relig_cond
afrobarometer %<>%
  mutate(religion_bin = responses_to_NA(relig_cond, c("Don't know")))

# Religiosity q98b
afrobarometer %<>%
  mutate(religiosity = responses_to_NA(q98b, c("Missing", "Don't know", "Refused", "Layene")),
         religiosity = fct_recode(religiosity,
                                  "Never" = "Respondent has no religion")) #assume that no religion & never attend service are similar
# Home language q2
afrobarometer %<>%
  mutate(lang_home  = responses_to_NA(q2, c("Refused", "Missing", "Don't know")))

# Language Used q103
afrobarometer %<>%
  mutate(lang_used  = responses_to_NA(q103, c("Refused", "Missing", "Don't know")))

# Ethnicity Q87
afrobarometer %<>%
  mutate(ethnicity = responses_to_NA(q87, c("Refused to answer", "Missing", "Don't know")))

# Age q1
afrobarometer %<>% 
  mutate(age = responses_to_NA(q1, c("Refused", "Missing", "Don't know")))

# Income: roof material q105
afrobarometer %<>%
  mutate(income_roof = responses_to_NA(q105, c("Could not tell / could not see", "Missing")))

# Income: water source q93a
afrobarometer %<>%
  mutate(income_water = responses_to_NA(q93a, c("Missing", "Refused", "Don't know")),
         income_water = fct_relevel(income_water,
                                    c("Outside the compound", "Inside the compound", "Inside the house")))
# Urban 
afrobarometer %<>%
  mutate(urban = urbrur %in% c("Urban"))

# Ethnic group q87
afrobarometer %<>%
  mutate(ethnicity = responses_to_NA(q87, c("Refused to answer", "Don't know", "Missing")))

# Radio q12a
afrobarometer %<>%
  mutate(radio = responses_to_NA(q12a, c("Missing", "Don't know", "Refused")))

# TV q12b
afrobarometer %<>%
  mutate(tv = responses_to_NA(q12b, c("Missing", "Don't know", "Refused")))

# Newspaper q12c
afrobarometer %<>%
  mutate(newspaper = responses_to_NA(q12c, c("Missing", "Don't know", "Refused")))

# Internet q12d
afrobarometer %<>%
  mutate(internet = responses_to_NA(q12d, c("Missing", "Don't know", "Refused")))

# Social media q12e
afrobarometer %<>%
  mutate(social_media = responses_to_NA(q12e, c("Missing", "Don't know", "Refused")))

# Media consumption controls
# Aggregate all media into a single variable
afrobarometer$media_aggregate <- 
  as.numeric(afrobarometer$radio) + as.numeric(afrobarometer$tv) +
  as.numeric(afrobarometer$newspaper) + as.numeric(afrobarometer$internet) + 
  as.numeric(afrobarometer$social_media) %>% - 4 #rescale to 0

# All media except Radio
afrobarometer$media_noradio <- 
  as.numeric(afrobarometer$tv) +
  as.numeric(afrobarometer$newspaper) + as.numeric(afrobarometer$internet) + 
  as.numeric(afrobarometer$social_media)

# All media except TV
afrobarometer$media_notv <- 
  as.numeric(afrobarometer$radio) +
  as.numeric(afrobarometer$newspaper) + as.numeric(afrobarometer$internet) + 
  as.numeric(afrobarometer$social_media)

# All media except newspaper
afrobarometer$media_nopaper <- 
  as.numeric(afrobarometer$radio) + as.numeric(afrobarometer$tv) +
  as.numeric(afrobarometer$internet) + 
  as.numeric(afrobarometer$social_media)

# All media except internet
afrobarometer$media_nointernet <- 
  as.numeric(afrobarometer$radio) + as.numeric(afrobarometer$tv) +
  as.numeric(afrobarometer$newspaper) + 
  as.numeric(afrobarometer$social_media)

# All media except social media
afrobarometer$media_nosocialmedia <- 
  as.numeric(afrobarometer$radio) + as.numeric(afrobarometer$tv) +
  as.numeric(afrobarometer$newspaper) + as.numeric(afrobarometer$internet)

# Other tolerance variables Q89a,b,d,e
# Religious Tolerance (ordinal)
afrobarometer %<>%
  mutate(relig_tol = responses_to_NA(q89a, c("Don't know", "Refused", "Missing", "Not asked in this country")))

# Relig Tolerance (logical)
# if we use %in% it includes NAs as 'false', but they should be dropped instead
afrobarometer %<>%
  mutate(relig_tol2 = responses_to_NA(q89a, c("Don't know", "Refused", "Missing", "Not asked in this country")),
         relig_tol2 = fct_recode(relig_tol2,
                                 "TRUE" = "Would not care",
                                 "TRUE" = "Somewhat  like",
                                 "TRUE" = "Strongly like",
                                 "FALSE" = "Somewhat dislike",
                                 "FALSE" = "Strongly  dislike"
         ))
afrobarometer$relig_tol2 <- as.logical(afrobarometer$relig_tol2)

# Ethnic Tolerance (ordinal)
afrobarometer %<>%
  mutate(ethnic_tol = responses_to_NA(q89b, c("Don't know", "Refused", "Missing", "Not asked in this country")))

# Ethnic Tolerance (logical)
# if we use %in% it includes NAs as 'false', but they should be dropped instead
afrobarometer %<>%
  mutate(ethnic_tol2 = responses_to_NA(q89b, c("Don't know", "Refused", "Missing", "Not asked in this country")),
         ethnic_tol2 = fct_recode(ethnic_tol2,
                                 "TRUE" = "Would not care",
                                 "TRUE" = "Somewhat  like",
                                 "TRUE" = "Strongly like",
                                 "FALSE" = "Somewhat dislike",
                                 "FALSE" = "Strongly  dislike"
         ))
afrobarometer$ethnic_tol2 <- as.logical(afrobarometer$ethnic_tol2)

# HIV Tolerance (ordinal)
afrobarometer %<>%
  mutate(hiv_tol = responses_to_NA(q89d, c("Don't know", "Refused", "Missing", "Not asked in this country")))

# HIV Tolerance (logical)
# if we use %in% it includes NAs as 'false', but they should be dropped instead
afrobarometer %<>%
  mutate(hiv_tol2 = responses_to_NA(q89d, c("Don't know", "Refused", "Missing", "Not asked in this country")),
         hiv_tol2 = fct_recode(hiv_tol2,
                                  "TRUE" = "Would not care",
                                  "TRUE" = "Somewhat  like",
                                  "TRUE" = "Strongly like",
                                  "FALSE" = "Somewhat dislike",
                                  "FALSE" = "Strongly  dislike"
         ))
afrobarometer$hiv_tol2 <- as.logical(afrobarometer$hiv_tol2)

# Immigrant Tolerance (ordinal)
afrobarometer %<>%
  mutate(immig_tol = responses_to_NA(q89e, c("Don't know", "Refused", "Missing", "Not asked in this country")))

# Immigrant Tolerance (logical)
# if we use %in% it includes NAs as 'false', but they should be dropped instead
afrobarometer %<>%
  mutate(immig_tol2 = responses_to_NA(q89e, c("Don't know", "Refused", "Missing", "Not asked in this country")),
         immig_tol2 = fct_recode(immig_tol2,
                               "TRUE" = "Would not care",
                               "TRUE" = "Somewhat  like",
                               "TRUE" = "Strongly like",
                               "FALSE" = "Somewhat dislike",
                               "FALSE" = "Strongly  dislike"
         ))
afrobarometer$immig_tol2 <- as.logical(afrobarometer$immig_tol2)

# Tolerance Control Variables
# First recode factor to be numeric so I can add them all together
afrobarometer %<>%
  mutate(sexuality_3 = sexuality,
         sexuality_3 = fct_recode(sexuality_3,
                                 "1" = "Strongly  dislike",
                                 "2" = "Somewhat dislike",
                                 "3" = "Would not care",
                                 "4" = "Somewhat  like",
                                 "5" = "Strongly like"
         ))
afrobarometer %<>%
  mutate(relig_tol3 = relig_tol,
         relig_tol3 = fct_recode(relig_tol3,
                                 "1" = "Strongly  dislike",
                                 "2" = "Somewhat dislike",
                                 "3" = "Would not care",
                                 "4" = "Somewhat  like",
                                 "5" = "Strongly like"
         ))
afrobarometer %<>%
  mutate(ethnic_tol3 = ethnic_tol,
         ethnic_tol3 = fct_recode(ethnic_tol3,
                                 "1" = "Strongly  dislike",
                                 "2" = "Somewhat dislike",
                                 "3" = "Would not care",
                                 "4" = "Somewhat  like",
                                 "5" = "Strongly like"
         ))
afrobarometer %<>%
  mutate(hiv_tol3 = hiv_tol,
         hiv_tol3 = fct_recode(hiv_tol3,
                                 "1" = "Strongly  dislike",
                                 "2" = "Somewhat dislike",
                                 "3" = "Would not care",
                                 "4" = "Somewhat  like",
                                 "5" = "Strongly like"
         ))
afrobarometer %<>%
  mutate(immig_tol3 = immig_tol,
         immig_tol3 = fct_recode(immig_tol3,
                                 "1" = "Strongly  dislike",
                                 "2" = "Somewhat dislike",
                                 "3" = "Would not care",
                                 "4" = "Somewhat  like",
                                 "5" = "Strongly like"
                                ))

afrobarometer$tolerance <- #A control variable that aggregates all tolerance questions
  as.numeric(afrobarometer$sexuality_3) + as.numeric(afrobarometer$relig_tol3) +
  as.numeric(afrobarometer$ethnic_tol3) + as.numeric(afrobarometer$hiv_tol3) + as.numeric(afrobarometer$immig_tol3)

afrobarometer$tolerance_noLGBT <- #A control variable that aggregates all tolerance except LGBT
  as.numeric(afrobarometer$relig_tol3) +
  as.numeric(afrobarometer$ethnic_tol3) + as.numeric(afrobarometer$hiv_tol3) + as.numeric(afrobarometer$immig_tol3)

# Now repeat, but use the logical tolerance coding instead of ordinal
afrobarometer %<>%
  mutate(sexuality_4 = as.factor(sexuality2),
         sexuality_4 = recode_factor(sexuality_4,
                             "FALSE" = "0",
                             "TRUE" = "1"))
afrobarometer %<>%
  mutate(relig_tol4 = as.factor(relig_tol2),
         relig_tol4 = recode_factor(relig_tol4,
                                 "FALSE" = "0",
                                 "TRUE" = "1"))
afrobarometer %<>%
  mutate(ethnic_tol4 = as.factor(ethnic_tol2),
         ethnic_tol4 = recode_factor(ethnic_tol4,
                             "FALSE" = "0",
                             "TRUE" = "1"))
afrobarometer %<>%
  mutate(hiv_tol4 = as.factor(hiv_tol2),
         hiv_tol4 = recode_factor(hiv_tol4,
                             "FALSE" = "0",
                             "TRUE" = "1"))
afrobarometer %<>%
  mutate(immig_tol4 = as.factor(immig_tol2),
         immig_tol4 = recode_factor(immig_tol4,
                             "FALSE" = "0",
                             "TRUE" = "1"))

afrobarometer$tolerance2 <- #A control variable that aggregates all tolerance questions
  as.numeric(afrobarometer$sexuality_4) + as.numeric(afrobarometer$relig_tol4) +
  as.numeric(afrobarometer$ethnic_tol4) + as.numeric(afrobarometer$hiv_tol4) + as.numeric(afrobarometer$immig_tol4)

afrobarometer$tolerance_noLGBT2 <- #A control variable that aggregates all tolerance q except LGBT
  as.numeric(afrobarometer$relig_tol4) +
  as.numeric(afrobarometer$ethnic_tol4) + as.numeric(afrobarometer$hiv_tol4) + as.numeric(afrobarometer$immig_tol4)

# Interest in Public Affairs
afrobarometer %<>%
  mutate(public_affairs = responses_to_NA(q13, c("Missing", "Refused", "Don't know")))

## Freedom House Data
# score is numeric from 0-100 where 100 = least free
# status is breakdown of score where 0-30 = Free, 31-60 = Part Free, 61-100 = Not free
freedom_house <-
  read.csv("data-raw/freedom house/freedom_of_press.csv", header=T, na.strings = c("", "NA")) %>%
  rename(fh_score = Total.Score, fh_status = Status,
         fh_net_score = net_score, fh_net_status = net_status) %>%
  dplyr::select(country, fh_score, fh_status)

# Now, rework the FH score to be more meaningful.
# Turn it into a 0:1 scale where 1 = most free. 
freedom_house$fh_score <- as.character(freedom_house$fh_score) #remove factors to get rid of arbitrary ordering
freedom_house <- 
  transform(freedom_house, fh_scale = as.numeric(fh_score) / 100) #convert to 0:1 instead of 0-100
freedom_house$fh_scale <- 1.00 - freedom_house$fh_scale #invert so that 1 = more press freedom

# Subset to only African countries
afrob_names <-    # create vector of Afrob country names
  as.vector(unique(afrobarometer$country))
freedom_house$country <- as.character(freedom_house$country) #fix spelling incongruency 
freedom_house$country[freedom_house$country=="C\x99te d'Ivoire"] <- "Cote d'Ivoire"
freedom_house_africa <-     #subset to keep countries in Afrob
  subset(freedom_house, country %in% afrob_names)
saveRDS(freedom_house_africa,
        file = project_path("data", "freedom_house_africa.rds"))

## KOF Data: globalization
# Index of globalization for every country
kof <- read.csv("data-raw/kof/Data_2018.csv") #read raw data
kof$country <- as.character(kof$country) # rename country names to match Afrob
kof$country[kof$country == 'Egypt, Arab Rep.'] <- "Egypt"
kof$country[kof$country == 'Sao Tome and Principe'] <- "São Tomé and Príncipe"
kof <- kof %>%    # subset to keep country in Afrob and latest year of data
  subset(country %in% afrob_names) %>%
  subset(year == "2015") %>%
  dplyr::select(country, KOFSoGI) # keep only the indicator I use in analysis 

# Rename location.level.1 to district
afrobarometer %<>%
  mutate(district = location.level.1)

# Select Afrobarometer variables that I will use in analysis
afrobarometer <- 
  dplyr::select(afrobarometer, respno, country, district, sexuality, sexuality2, female, education, religiosity,
         religion, religion_bin,
         age, income_water, urban, radio, tv, newspaper,
         internet, social_media, relig_tol, relig_tol2, ethnic_tol, ethnic_tol2, 
         hiv_tol, hiv_tol2, immig_tol, immig_tol2, tolerance, tolerance2, tolerance_noLGBT, tolerance_noLGBT2,
         media_aggregate, media_noradio, media_notv, media_nopaper, media_nointernet, media_nosocialmedia)


# Join country level indicators to Afrobaromter dataset
afrobarometer <- 
  left_join(afrobarometer, freedom_house, by = "country") %>%
  left_join(., kof, by = "country")

# Test validity of the data and catch any issues
data_exists(afrobarometer)

# Save as RDS
saveRDS(afrobarometer,
        file = project_path("data", "afrobarometer.rds"))

# Save as CSV
#write.csv(afrobarometer, "data/afrobarometer.csv")

